naL1 <- fread("rorb_annuli_stats_GoogleDoc.csv", header = FALSE)
cnames <- colnames(naL1)
meta <- fread("GoogleDocData.csv")
loc <- fread("GoogleDocData.csv")[, 1:3]
ids <- as.data.frame(read.csv("GoogleDocData.csv")[, 4])
colnames(ids) <- 'id'
ccolL1 <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'black', 'green')
ccolL1 <- rep(ccolL1, each = 15)
CHAN_NAMES <- rep(c('DAPI1', 'DAPI2', 'DAPI3', 'GABA', 'GAD2', 'Gephyrin', 'GluN1',
'MBP', 'PSD95', 'synapsin', 'TdTomato', 'VGlut1'), each = 15)
colnames(naL1) <- paste0(CHAN_NAMES,"_", sprintf("%02d",1:15))snaL1 <- scale(naL1, center = TRUE, scale = TRUE)
pc1 <- princomp(snaL1)
gaba <- as.factor(meta$gaba)Next, applying LDA and QDA iterating on the number of PCA dimension included.
ldaL1 <- foreach(x = 1:ncol(snaL1)) %dopar% {
ldatmp <- lda(gaba ~ pc1$scores[, 1:x], CV = FALSE)
ldatmp
}
qdaL1 <- foreach(x = 1:ncol(snaL1)) %dopar% {
qdatmp <-
tryCatch(qda(as.factor(gaba) ~ pc1$scores[, 1:x], CV = FALSE),
error=function(err) NA)
qdatmp
}
rfL1 <- foreach(x = 1:ncol(snaL1)) %dopar% {
tmpdat <- data.table(gaba = gaba, pc1$scores[, 1:x])
rftmp <- randomForest(gaba ~ ., data = tmpdat, prox = TRUE)
rftmp
}
ll <- sapply(ldaL1, function(x) sum(predict(x)$class != gaba)/length(gaba))
ql <- sapply(qdaL1[!is.na(qdaL1)], function(x) sum(predict(x)$class != gaba)/length(gaba))
rl <- sapply(rfL1, function(x) sum(predict(x) != gaba)/length(gaba))plot(x = 1:ncol(pc1$scores), y = seq(0,max(ll,ql,rl),length = ncol(pc1$scores)), type = 'n', xlab = expression(hat(d)), ylab = "L")
points(ll, type = 'b', col = 'blue', pch = 20, cex = 0.5)
points(ql, type = 'b', col = 'orange', pch = 15, cex = 0.5)
points(rl, type = 'b', col = 'darkgreen', lty = 2, pch = 24, cex = 0.2)
abline(h = chance <- sum(gaba == 1)/length(gaba), col = 'magenta', lty=4)
text(40,min(ql), label = paste("qda", min(round(ql, 3))), col = 'orange', pos = 4)
text(ncol(snaL1)/2, min(ll), label = paste("lda", min(round(ll,3))), col = 'blue', pos = 1)
text(ncol(snaL1)/3, min(rl), label = paste("rf", min(round(rl,3))), col = 'darkgreen')
text(ncol(snaL1)/4, chance, label = "chance", col = 'magenta', pos = 3)naInf <- fread("rorb_annuli_stats_GoogleDocInf.csv", header = FALSE)
cnames <- colnames(naInf)
meta <- fread("GoogleDocData.csv")
loc <- fread("GoogleDocData.csv")[, 1:3]
ids <- as.data.frame(read.csv("GoogleDocData.csv")[, 4])
colnames(ids) <- 'id'
ccolLinf <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'black', 'green')
ccolLinf <- rep(ccolLinf, each = 5)
CHAN_NAMES <- rep(c('DAPI1', 'DAPI2', 'DAPI3', 'GABA', 'GAD2', 'Gephyrin', 'GluN1',
'MBP', 'PSD95', 'synapsin', 'TdTomato', 'VGlut1'), each = 5)
colnames(naInf) <- paste0(CHAN_NAMES,"_", sprintf("%d",1:5))
## annotation ids in the BOSS are +1 from the ids in Forrest's gsheet.
#annoSizes <- read.csv('annotation_sizes_pixels.csv')
#annoSizes$id <- annoSizes$id - 1
#
#tmp <- merge(ids, annoSizes, by = "id", all.x = TRUE)snaLinf <- scale(naInf, center = TRUE, scale = TRUE)
pcInf <- princomp(snaLinf)
gaba <- as.factor(meta$gaba)Next, applying LDA and QDA iterating on the number of PCA dimension included.
ldaLinf <- foreach(x = 1:ncol(snaLinf)) %dopar% {
ldatmp <- lda(gaba ~ pcInf$scores[, 1:x], CV = FALSE)
ldatmp
}
qdaLinf <- foreach(x = 1:ncol(snaLinf)) %dopar% {
qdatmp <-
tryCatch(qda(as.factor(gaba) ~ pcInf$scores[, 1:x], CV = FALSE),
error=function(err) NA)
qdatmp
}
rfLinf <- foreach(x = 1:ncol(snaLinf)) %dopar% {
tmpdat <- data.table(gaba = gaba, pcInf$scores[, 1:x])
rftmp <- randomForest(gaba ~ ., data = tmpdat, prox = TRUE)
rftmp
}
ll <- sapply(ldaLinf, function(x) sum(predict(x)$class != gaba)/length(gaba))
ql <- sapply(qdaLinf, function(x) sum(predict(x)$class != gaba)/length(gaba))
rl <- sapply(rfLinf, function(x) sum(predict(x) != gaba)/length(gaba))plot(x = 1:ncol(pcInf$scores), y = seq(0,max(ll,ql,rl),length = ncol(pcInf$scores)), type = 'n', xlab = expression(hat(d)), ylab = "L")
points(ll, type = 'b', col = 'blue', pch = 20, cex = 0.5)
points(ql, type = 'b', col = 'orange', pch = 15, cex = 0.5)
points(rl, type = 'b', col = 'darkgreen', lty = 2, pch = 24, cex = 0.2)
abline(h = chance <- sum(gaba == 1)/length(gaba), col = 'magenta', lty=4)
text(40,min(ql), label = paste("qda", min(round(ql, 3))), col = 'orange', pos = 4)
text(ncol(snaLinf)/2, min(ll), label = paste("lda", min(round(ll,3))), col = 'blue', pos = 1)
text(ncol(snaLinf)/3, min(rl), label = paste("rf", min(round(rl,3))), col = 'darkgreen')
text(ncol(snaLinf)/4, chance, label = "chance", col = 'magenta', pos = 3)